library(tidyverse)
library(quanteda)
library(stringi)
library(LSS)
library(broom)

# LSS

# Japan

## Load data
load(paste0(in_data_dir, "dfm_tweets_ja.rda"))
fl_keys_ja <- paste0(in_data_dir, "fl_keys_ja.txt") %>% read_lines()

## Past-Present model

### Past-Presentp olarity dictionary
his_list <- c("慰安婦", "竹島", "日本海", '歴史', "親日", "戦争協力企業", "歴史認識")
his_eng_list <- c("Confort Women", "Takeshima", "Sea of Japan", "History",
                  "Pro-Japanese", "War criminal corporation", 
                  "Historical awareness")
pol_list <- c("外交", "貿易", "経済", "安倍首相", "政府", "輸出")
pol_eng_list  <- c("Diplomacy", "Trade", "Economy", "Prime Minister Abe", "Government", "Export")

c(sprintf("%s (%s)", his_list, his_eng_list) %>% paste(collapse = ", "), 
  sprintf("%s (%s)", pol_list, pol_eng_list) %>% paste(collapse = ", ")) %>%
  paste(collapse = "\n") %>% cat()

polarity_dic_hist_ja <- dictionary(list(history = his_list, political = pol_list))

### Estimation
lss_out_hist_ja <- textmodel_lss(dfm_tweets_ja,
                                 seeds = as.seedwords(polarity_dic_hist_ja), 
                                 terms = fl_keys_ja)


textplot_scale1d(lss_out_hist_ja, 
                 highlighted = unlist(polarity_dic_hist_ja))
ggsave("lss_coef_hist_ja.png", path = fig_dir, width = 8, height = 5)


### Prediction
lss_pred_hist_ja <- predict(lss_out_hist_ja, dfm_tweets_ja)

data_focus_ja <- 
  docvars(dfm_tweets_ja) %>%
  mutate(lss_pred = lss_pred_hist_ja) %>%
  mutate(Left = ifelse(cluster_title_short == "Left-wing", "Leftist", "Rightist")) %>% 
  pivot_longer(fl:wl, names_to = 'cat', values_to = "cat_value") %>% filter(cat_value == T) %>%
  group_by(cat, Left) %>%
  summarise(Past = sum(lss_pred > .2, na.rm = T), 
            Present = sum(lss_pred < -.2, na.rm = T)) %>%
  pivot_longer(cols = 3:4, names_to = "Focus") %>% 
  group_by(cat, Left) %>%
  mutate(prop = value / sum(value)) %>%
  merge(data_topic_title, by = 'cat') %>%
  mutate(Country = "Japan")


## Sentiment polarity model

### dictionary
negative_list <- c("うんざり", "無能", "怒る", "怒り", "無知", "無駄", 
                   "ゴミ", "アホ", "バカ", "嫌い", "嫌")
negative_eng_list <- c("annoyed", "incompetent", "angry", "anger", "ignorant", "waste", 
                       "rubbish", "fool", "idiot", "dislike", "hate")
positive_list <- c("感謝", "最高", "友好", "成功", "盛り上げ", "嬉しい", 
                   "素晴らしい", "仲良く", "幸せ")
positive_eng_list <- c("gratitude", "best", "friendship", "success", "vitalize", "pleased", 
                       "marvellous", "befriend", "happy")

c(sprintf("%s (%s)", positive_list, positive_eng_list) %>% paste(collapse = ", "), 
  sprintf("%s (%s)", negative_list, negative_eng_list) %>% paste(collapse = ", ")) %>%
  paste(collapse = "\n") %>% cat()


polarity_dic <- dictionary(list(positive = positive_list, negative = negative_list))

### Estimation
lss_out_sentiment_ja <- textmodel_lss(
  dfm_tweets_ja,seeds = as.seedwords(polarity_dic), 
  terms = fl_keys_ja)

textplot_scale1d(lss_out_sentiment_ja, 
                 highlighted = unlist(polarity_dic))
ggsave("lss_sentiment_coef_ja.png", path = fig_dir, width = 8, height = 5)


### Prediction
lss_pred_sentiment_ja <- predict(lss_out_sentiment_ja, dfm_tweets_ja)

data_polarity_ja <- docvars(dfm_tweets_ja) %>%
  mutate(lss_pred = lss_pred_hist_ja) %>%
  mutate(lss_pred_sentiment = lss_pred_sentiment_ja) %>%
  mutate(Left = ifelse(cluster_title_short == "Left-wing", "Leftist", "Rightist")) %>% 
  pivot_longer(fl:wl, names_to = 'cat', values_to = "cat_value") %>% filter(cat_value == T) %>%
  mutate(Polarity = case_when(lss_pred_sentiment > .2 ~ "Positive",
                              lss_pred_sentiment < -.2 ~ "Negative",
                              TRUE ~ "Neutral")) %>%
  mutate(Focus = case_when(lss_pred > .2 ~ "Past",
                           lss_pred < -.2 ~ "Present")) %>%
  group_by(cat, Left) %>%
  count(Polarity, Focus) %>%
  drop_na() %>%
  group_by(cat, Left, Focus) %>%
  mutate(prop = n / sum(n)) %>%
  filter(Polarity == "Negative") %>%
  merge(data_topic_title, by = 'cat') %>%
  mutate(Country = "Japan")

data_lss_pred_ja <- docvars(dfm_tweets_ja) %>%
  mutate(lss_pred_hist = lss_pred_hist_ja) %>%
  mutate(lss_pred_sentiment = lss_pred_sentiment_ja) %>% 
  mutate(id_str = row.names(dfm_tweets_ja)) %>%
  mutate(Left = ifelse(cluster_title_short == "Left-wing", "Leftist", "Rightist")) %>% 
  select(id_str, starts_with("lss_"), Left) 



# Korea
## Load data
load(paste0(in_data_dir, "dfm_tweets_ko.rda"))
fl_keys_ko <- paste0(in_data_dir, "fl_keys_ko.txt") %>% read_lines()

## Past-Present model

### Past-Present polarity dictionary
his_list <- c("독재", "광복", "역사", "건국", "이승만", "일제강점기", 
              "독도", "동해", "위안부", "친일", "전범기업")

pol_list <- c("정부", "촛불", "대법원", "외교부", "사법부", 
              "수출", "자유한국당, 자한당", "민주당", "무역", 
              "한일의원연맹", "의원", "장관")

pol_eng_list <- c("Government", "Candlelight", "Supreme court", "Foreign ministry", 
                  "Judiciary", "Export", "Free korea party", "Democratic party", 
                  "Trade", "Japan-Korea Parliamentarians' Union", "MP", "Minister")
his_eng_list <- c("Dictatorship", "Liberation", "History", "Founding of nation", 
                  "Seung-Man Rhee", "Japanese occupation period", "Dokdo", "East Sea", 
                  "Comfort Women", "Pro-Japanese", "War criminal corporation")

c(sprintf("%s (%s)", his_list, his_eng_list) %>% paste(collapse = ", "), 
  sprintf("%s (%s)", pol_list, pol_eng_list) %>% paste(collapse = ", ")) %>%
  paste(collapse = "\n") %>% cat()


polarity_dic_hist_ko <- dictionary(list(history = his_list, political = pol_list))

### Estimation
lss_out_hist_ko <- textmodel_lss(dfm_tweets_ko,
                                 seeds = as.seedwords(polarity_dic_hist_ko), 
                                 terms = fl_keys_ko)
textplot_scale1d(lss_out_hist_ko, 
                 highlighted = unlist(polarity_dic_hist_ko))
ggsave("lss_coef_hist_ko.png", path = fig_dir, width = 8, height = 5)



### Prediction
lss_pred_hist_ko <- predict(lss_out_hist_ko, dfm_tweets_ko)

data_focus_ko <- 
  docvars(dfm_tweets_ko) %>%
  mutate(lss_pred = lss_pred_hist_ko) %>%
  filter(cluster_title_short %in% c("Leftist", "Rightist")) %>%
  rename(Left = cluster_title_short) %>%
  pivot_longer(fl:wl, names_to = 'cat', values_to = "cat_value") %>% filter(cat_value == T) %>%
  group_by(cat, Left) %>%
  summarise(Past = sum(lss_pred > .2, na.rm = T), 
            Present = sum(lss_pred < -.2, na.rm = T)) %>%
  pivot_longer(cols = 3:4, names_to = "Focus") %>% 
  group_by(cat, Left) %>%
  mutate(prop = value / sum(value)) %>%
  merge(data_topic_title, by = 'cat') %>%
  mutate(Country = "Korea")

## Sentiment polarity model

### dictionary

positive_list <- c("고맙다", "행복하다", "멋지다", "여유", "완벽하다", 
                   "아름답다", "행복", "화이팅", "해내다", "함께", 
                   "기쁘다", "대박", "성공")
negative_list <- c("짜증", "시끄럽다", "빡치다", "화가", "스트레스", 
                   "무식", "헛소리", "구역질", "노답", "쓰레기", "악마", 
                   "멍청하다", "역겹다")
positive_eng_list <- c("grateful", "happy", "great", "relaxed", "perfect", "beautiful", 
                       "happiness", "cheer up", "achieve", "together", "glad", "awesome", 
                       "success")
negative_eng_list <- c("annoyed", "shut up", "got angry", "anger", "stress", "ignorant", 
                       "gibberish", "disgusted", "incurable", "waste", "devil", "stupid", 
                       "disgusting")

c(sprintf("%s (%s)", positive_list, positive_eng_list) %>% paste(collapse = ", "),
  sprintf("%s (%s)", negative_list, negative_eng_list) %>% paste(collapse = ", ")) %>%
  paste(collapse = "\n") %>% cat()

polarity_dic <- dictionary(list(positive = positive_list, negative = negative_list))

### Estimation
lss_out_sentiment_ko <- textmodel_lss(
  dfm_tweets_ko,seeds = as.seedwords(polarity_dic), 
  terms = fl_keys_ko)

textplot_scale1d(lss_out_sentiment_ko, 
                 highlighted = unlist(polarity_dic))
ggsave("lss_sentiment_coef_ko.png", path = fig_dir, width = 8, height = 5)


### Prediction
lss_pred_sentiment_ko <- predict(lss_out_sentiment_ko, dfm_tweets_ko)

data_polarity_ko <- docvars(dfm_tweets_ko) %>%
  mutate(lss_pred = lss_pred_hist_ko) %>%
  mutate(lss_pred_sentiment = lss_pred_sentiment_ko) %>%
  filter(cluster_title_short %in% c("Leftist", "Rightist")) %>%
  rename(Left = cluster_title_short) %>%
  pivot_longer(fl:wl, names_to = 'cat', values_to = "cat_value") %>% filter(cat_value == T) %>%
  mutate(Polarity = case_when(lss_pred_sentiment > .2 ~ "Positive",
                              lss_pred_sentiment < -.2 ~ "Negative",
                              TRUE ~ "Neutral")) %>%
  mutate(Focus = case_when(lss_pred > .2 ~ "Past",
                           lss_pred < -.2 ~ "Present")) %>%
  group_by(cat, Left) %>%
  count(Polarity, Focus) %>%
  drop_na() %>%
  group_by(cat, Left, Focus) %>%
  mutate(prop = n / sum(n)) %>%
  filter(Polarity == "Negative") %>%
  merge(data_topic_title, by = 'cat') %>%
  mutate(Country = "Korea")

data_lss_pred_ko <- docvars(dfm_tweets_ko) %>%
  mutate(lss_pred_hist = lss_pred_hist_ko) %>%
  mutate(lss_pred_sentiment = lss_pred_sentiment_ko) %>% 
  mutate(id_str = row.names(dfm_tweets_ko)) %>%
  rename(Left = cluster_title_short) %>%
  select(id_str, starts_with("lss_"), Left) 


## Generating figures
bind_rows(data_focus_ja, data_focus_ko) %>%
  ggplot(aes(x = Left, y = prop, group = Focus, fill = Focus)) + 
  geom_bar(stat = "identity") + facet_grid(topic~Country) + coord_flip() +
  scale_x_discrete("") +
  scale_y_continuous("Proportion") + 
  scale_fill_grey() + 
  theme_minimal() + 
  theme(legend.position = "bottom") 
ggsave("lss_pred_issues_combined.png", path = fig_dir, width = 8, height = 3.5)
ggsave("figure3.pdf", path = fig_dir, width = 8, height = 4)

bind_rows(data_polarity_ja, data_polarity_ko) %>%
  ggplot(aes(x = Left, y = prop, group = Focus, fill = Focus)) + 
  geom_bar(stat = "identity", position = position_dodge()) + 
  facet_grid(topic ~ Country) + coord_flip() +
  scale_x_discrete("") +
  scale_y_continuous("Proportion of Negative Tweets") + 
  scale_fill_grey() + 
  theme_minimal() + 
  theme(legend.position = "bottom") 
ggsave("lss_sentiment_focus_pred_issues_combined.png", 
       path = fig_dir, width = 8, height = 4)
ggsave("figure4.pdf", 
       path = fig_dir, width = 8, height = 4)



# Target analysis

net_rightist <- c("ネトウヨ", "右翼", "ウヨ", "愛国者", "右派", "右")
net_leftist <- c("サヨ", "左翼", "サヨク", "パヨク", "売国奴", "左派", "左")
jp_government <- c("安倍", '安倍首相', "官邸", "内閣", "安倍晋三", "日本政府")
ko_government <- c("ムン", 'ムンジェイン', 'ムン・ジェイン', '文在寅', "青瓦台", "韓国政府")

dict_target_ja <- dictionary(list(
  net_rightist = net_rightist,
  net_leftist = net_leftist,
  jp_government = jp_government,
  ko_government = ko_government
))

data_target_ja <- dfm_lookup(dfm_tweets_ja, dict_target_ja) %>% convert(to = "data.frame")


net_rightist <- c("자유한국당", "자한당", "박근혜", "박정희", "보수", "우파")
net_leftist <- c("민주당", "진보", "좌파", "친노", "친문", "노무현", "김대중")
jp_government <- c("아베", "일본 정부", "일본 외상")
ko_government <- c("문", '문재인', '문죄인', '문제인', "청와대", "정부", "韓", "文")

dict_target_ko <- dictionary(list(
  net_rightist = net_rightist,
  net_leftist = net_leftist,
  jp_government = jp_government,
  ko_government = ko_government
))

data_target_ko <- dfm_lookup(dfm_tweets_ko, dict_target_ko) %>% convert(to = "data.frame")

## 
data_lss_pred_ja %>%
  bind_cols(data_target_ja) %>%
  filter(Left %in% c("Leftist", "Rightist")) %>%
  pivot_longer(net_rightist:ko_government, names_to = "target", values_to = "target_value") %>%
  filter(target_value > 0) %>% 
  mutate(lss_pred_sentiment = scale(lss_pred_sentiment)) %>%
  select(target, Left, lss_pred_sentiment) %>% 
  mutate(lss_pred_sentiment = as.integer(lss_pred_sentiment < -.3)) %>%
  group_by(target, Left) %>% 
  nest() %>% 
  spread(key = Left, value = data) %>%
  mutate(
    t_test = map2(Leftist, Rightist, ~{t.test(.x$lss_pred_sentiment, .y$lss_pred_sentiment) %>% tidy()}),
    f_test = map2(Leftist, Rightist, ~{fisher.test(rbind(table(.x$lss_pred_sentiment), table(.y$lss_pred_sentiment))) %>% tidy()}),
    Leftist = map(Leftist, nrow),
    Rightist = map(Rightist, nrow)
  ) %>%
  unnest(cols = c(Leftist, Rightist, t_test, f_test), names_repair = "unique") %>%
  select(1, 5, 6, 15) %>% merge(data_target) %>%
  mutate_at(2:4, round, 3) %>%
  select(5, 2:4) %>%
  rename(`Pct Neg (Left)` = 2, `Pct Neg (Right)` = 3, `p-value` = 4) %>%
  write_csv(path = paste0(out_data_dir, "table_target_ja.csv"))


data_lss_pred_ko %>%
  bind_cols(data_target_ko) %>%
  filter(Left %in% c("Leftist", "Rightist")) %>%
  pivot_longer(net_rightist:ko_government, names_to = "target", values_to = "target_value") %>%
  filter(target_value > 0) %>% 
  mutate(lss_pred_sentiment = scale(lss_pred_sentiment)) %>%
  select(target, Left, lss_pred_sentiment) %>% 
  mutate(lss_pred_sentiment = as.integer(lss_pred_sentiment < -.3)) %>%
  group_by(target, Left) %>% 
  nest() %>% 
  spread(key = Left, value = data) %>%
  mutate(
    t_test = map2(Leftist, Rightist, ~{t.test(.x$lss_pred_sentiment, .y$lss_pred_sentiment) %>% tidy()}),
    f_test = map2(Leftist, Rightist, ~{fisher.test(rbind(table(.x$lss_pred_sentiment), table(.y$lss_pred_sentiment))) %>% tidy()}),
    Leftist = map(Leftist, nrow),
    Rightist = map(Rightist, nrow)
  ) %>%
  unnest(cols = c(Leftist, Rightist, t_test, f_test), names_repair = "unique") %>%
  select(1, 5, 6, 15) %>% merge(data_target) %>%
  mutate_at(2:4, round, 3) %>%
  select(5, 2:4) %>%
  rename(`Pct Neg (Left)` = 2, `Pct Neg (Right)` = 3, `p-value` = 4) %>%
  write_csv(path = paste0(out_data_dir, "table_target_ko.csv"))

